#include "draft.h"
#include "mtp.h"
#include <glib.h>
#include <math.h>
#include <stdlib.h>
#include <ctype.h>

typedef struct
{
    gchar* name;
    GtkWidget* selector;
    enum {OK, CANCEL} button;
} FileSelectionData;

BOOL next_mtp = TRUE;   
int minolap = 5;                /* Minimum FPC overlap               */
extern int minId;               /* Minimum identity                  */
int minHitLenPer = 90;          /* Minimun hit length percentag      */
int top_num = 10;               /* Number of top left or right clones 
                                 * shown in the list*/
static int seq_error = 5;
static int FPC_error = 5;
static gboolean ordered=FALSE;  /* Indicate the multiple sequence contig
                                 * ordered or not*/
char bssFileLoad[MAXPATHLEN+1]="";
GtkWidget* file_selector;

int input_ctg;       /* Subdirectory contig number of bss file */
int cur_contig;          /* Current contig number              */ 
int max_seqctg;         /* Number of seqctgs read from bss file*/ 
static char bes_name[50]="";    /* Input clone name for alignment     */

char seq_name[50];              /* Sequence name */
CLONE *seq_clone[MAX_SEQCLONE];  /* Sequenced clone list       */
int num_sclone;          /* Number of sequenced clones; Sequence 
                                 * might be in multiple clones in FPC */
int query_x, query_y;    /* Seq clones' x and y coordinate     */

static struct list *clone_list;        /*All clone list                      */

static int minOlapBox, minIdBox, minHitLenBox, bssFileBox;
static char minOlapText[14], minIdText[14], 
            minHitLenText[14], cloneText[50], topnumText[10];

int gEvaluateClone;
extern int gMTP;

extern void file_selection_cb(GtkButton* button, FileSelectionData* sdata);
extern void lowerstring(char *search);
extern BOOL strcasestr_priv(char *line,char *search);

void hit_CR();
static void display_alignment(const char *query_name, const GArray *dbs, const BSS_HIT* hit);
static void load_bss_file(void); 
static void find_clones(void);
static void dp_alignment(void);

static BOOL is_reversed(SeqCtg **s);
int get_curctg(BSS_INST*);
static void gen_list(void);
static void free_list(void);
void get_hits(HitList hits, SeqCtg *seq_ctg, int ctg, GArray *bes,BSS_INST* pbsi);
static void find_contained(SeqCtg **s);
static void add_to_bes(GArray *bes, SeqCtg *seq_ctg, BSS_HIT *t);
int in_bes(GArray *bes, BSS_HIT *t);
int read_seqctgs(SeqCtg *seq_ctgs);
static void print_clonelist(SeqCtg **s, direction mtp_dir, BOOL reversed);
static void reverse_seq(char *str, int size);
static void complement_seq(char *seq, int size);
static void get_queryseq(const char *query_file, const BSS_HIT *hit, int qstart, int qend, char *query_seq);
static void read_queryseq(FILE *fp_query, const int start, const int end, char *query_seq);
static void get_subjectseq(const GArray *dbs, char *subject_seq, const BSS_HIT *hit);
static void get_scores(const char *query_seq, const char *subject_seq, int query_len, int subject_len, Scores **scores);
static void print_alignment(const BSS_HIT *hit, int qalign_len, char *query_align, char *subject_align, int qstart, int max_score);
static int min(int a, int b);
static int max(int a, int b);

void sort_seqctg_asce(SeqCtg **seqctgs, int size);
void sort_hitclone_start(HitClone **hit_clones, int size);
static void sort_hitclone_end(HitClone **hit_clones, int size);

static gint seqctg_compare_asce(const SeqCtg** c1, const SeqCtg** c2);
static gint hitclone_compare_start(const HitClone** h1, const HitClone** h2);
static gint hitclone_compare_end(const HitClone** h1, const HitClone** h2);

/*********************************************************
                  MAIN: ZevaluateClone
 The graphics for the 'Eval Clone Seq->BES Results' window.
*********************************************************/
void ZevaluateClone(){
  float row;
  static MENUOPT amenu[] = { {quitEvaluateClone, "Close"},
                             { graphPrint,"Print Screen"},
                             { 0, 0 } };

  if(graphActivate(gEvaluateClone)){
    graphPop();
  }
  else{
    gEvaluateClone = graphCreate (TEXT_FIT,"Eval Clone Seq->BES Results",.2,.2,.450,.250);
  }

  sprintf(minOlapText,"%d",minolap);
  sprintf(minHitLenText,"%d",minHitLenPer);
  sprintf(minIdText,"%d",minId);
  sprintf(topnumText, "%d", top_num);

  graphClear();
  graphMenu(amenu);

  row = 1.0;
  graphText("Min FPC Overlap:",1,row);
  minOlapBox = graphTextEntry(minOlapText, 4, 18, row,hit_CR);
  
  row+=2.0;
  graphText("HitLen%:",1,row);
  minHitLenBox = graphTextEntry(minHitLenText, 4, 18, row,hit_CR);

  graphText("Identity:",24,row);
  minIdBox = graphTextEntry(minIdText, 4, 41, row,hit_CR);

  row += 2.0;
  graphText("BSS File:",1,row);

  bssFileBox = graphTextScrollEntry(bssFileLoad, MAXPATHLEN, 25, 11, row,hit_CR);
  graphButton("Load...",load_bss_file,37,row);

  row += 2.0;
  graphText("Top hits: ", 1, row);
  graphTextEntry(topnumText, 5, 11, row, hit_CR);
  graphButton("Find next MTP clones", find_clones, 24,row);

  row += 2.0;
  graphText("BES name: ", 1, row);
  graphTextEntry(cloneText, 18, 13, row, hit_CR);
  graphButton("DP alignment", dp_alignment ,32,row); 
  //graphButton("Help",show_eval_help,1,row);

#ifdef ORDER
  row += 2.0;
  graphButton("Order SeqCtgs", order_seqctgs, 1,row);
#endif
 
  row+=2.0;
  graphButton("Close",quitEvaluateClone,1,row);
  
  graphRedraw();
}

void quitEvaluateClone(){
  if(graphActivate(gEvaluateClone)){
    graphDestroy();
  }
}

void hit_CR(){
  char *pos1, *pos2;
  char str_tmp[MAXPATHLEN+1], str_ctg[20];
  int len;

  memset(str_tmp, '\0', MAXPATHLEN+1);
  memset(str_ctg, '\0', 20);

  if(graphActivate(gEvaluateClone)){
    sscanf(minOlapText,"%d", &minolap);
    sscanf(minIdText,"%d", &minId);
    sscanf(minHitLenText,"%d", &minHitLenPer);
    strcpy(bes_name, cloneText);
    sscanf(topnumText, "%d", &top_num);

    if(strcmp(bssFileLoad, "") != 0) {
      ordered = FALSE;
      memset(seq_name, '\0', 50);
      len = strlen(bssFileLoad);
      if(len != 0) {
        if(bssFileLoad[len-1] == '/') {
          printf("Please select a BSS file in the directory.\n");
          return;
        }
        pos1 = strstr(bssFileLoad, ".bss");
        if(pos1 == NULL) {
          printf("Please select a BSS file\n");
          return;
        }
        strncpy(str_tmp, bssFileLoad, pos1-bssFileLoad);
        str_tmp[strlen(str_tmp)]='\0';
        pos2 = strrchr(str_tmp, '/');

        if(pos2 != NULL) 
          strcpy(seq_name, pos2+1);
        else 
          strcpy(seq_name, str_tmp);

        seq_name[strlen(seq_name)] = '\0'; 
        pos2 = strstr(pos1+8, ".bss");
        if(pos2 != NULL) { 
          strncpy(str_ctg, pos1+8, pos2-pos1-8);
          input_ctg = atoi(str_ctg);
        } else 
          input_ctg = -1;
      }
    }

    if(graphActivate(gMTP)) 
      ZselectMTP();
    ZevaluateClone();
  }
}

/*                        DEF : load_bss_file
 *Load the BSS file                          */
static void
load_bss_file(void)
{
    FileSelectionData sdata_ok, sdata_cancel;
    const gchar bss_glob[] = "*.bss";
    gchar* bss_results;

    if(!file_selector) {
      file_selector =
        gtk_file_selection_new("Please select a BSS file or directory");
      bss_results =
        g_strjoin(G_DIR_SEPARATOR_S, GETCURDIR, "BSS_results", "", NULL);
      gtk_file_selection_set_filename(GTK_FILE_SELECTION(file_selector),
                                    bss_results);
      gtk_file_selection_hide_fileop_buttons(GTK_FILE_SELECTION(file_selector));
      sdata_ok.name = NULL;
      sdata_ok.selector = file_selector;
      sdata_ok.button = OK;
      sdata_cancel.name = NULL;
      sdata_cancel.selector = file_selector;
      sdata_cancel.button = CANCEL;
      gtk_signal_connect(
        GTK_OBJECT(GTK_FILE_SELECTION(file_selector)->ok_button),
        "clicked", GTK_SIGNAL_FUNC(file_selection_cb), &sdata_ok);
      gtk_signal_connect(
        GTK_OBJECT(GTK_FILE_SELECTION(file_selector)->cancel_button),
        "clicked", GTK_SIGNAL_FUNC(file_selection_cb), &sdata_cancel);
      gtk_widget_show(file_selector);
      gtk_main();
      if (sdata_ok.name != NULL) {
        strncpy(bssFileLoad, sdata_ok.name, sizeof(bssFileLoad));
        g_free(sdata_ok.name);
        bssFileLoad[sizeof(bssFileLoad) - 1] = '\0';
        if (strlen(bssFileLoad) >= sizeof(bss_glob) - 1 &&
            strcmp(bssFileLoad+ strlen(bssFileLoad) - (sizeof(bss_glob) - 1),
                   "*.bss") == 0)
            bssFileLoad[strlen(bssFileLoad) - (sizeof(bss_glob) - 1)] = '\0';
      }

      file_selector = NULL;
      g_free(bss_results);
      hit_CR(); 
      ZevaluateClone();
    } else 
      return;
}

/*                        DEF : find_clones
 *Find the left and right MTP clones for the query clone*/
static void find_clones() {
  BSS_INST bsi;
  gint ret;
  SeqCtg *seq_ctgs, *seq_ctg, **s;
  GArray **hit_clones; 
  int i, count; 
  HitList hit_list;
  GArray *bes;
  BESInfo *b;
  BOOL reversed;
  
  next_mtp = TRUE;

  hit_CR();

  if(strcmp(bssFileLoad, "") == 0) {
    printf("Please load BSS file first\n");
    return;
  }

  if (!read_bss_file(bssFileLoad, &bsi))
  {
    g_warning("Failed to open %s", bssFileLoad);
    return;
  }


  seq_ctgs = (SeqCtg *)malloc(MAX_SEQCTG*sizeof(SeqCtg));
  if(seq_ctgs == NULL) 
    messcrash("ERROR---out of memory\n");

  max_seqctg = read_seqctgs(seq_ctgs);

  if(max_seqctg > 1) {
    if(!ordered) {
      if(!messQuery("There are multiple sequence contigs. Are they ordered?")) {
        printf("FPC can only evalulate ordered draft sequence.\n");
        return;
      }else
        ordered = TRUE;
    }
  } else if(max_seqctg == 0) {
    printf("There are no SeqCtgs in the BSS file.\n");
    return;
  }

  ret = get_curctg(&bsi);
  if(ret == 2) {
    printf("The sequence is in multiple clones with different contigs.\n");
    for(i = 0; i < num_sclone; i++) 
      printf("Clone %s in contig %d\n", seq_clone[i]->clone, seq_clone[i]->ctg);
    printf("\n");
    return;
  } else if(ret == 1) {
    printf("There is no clone named %s\n\n", seq_name);
    return;
  } else if(ret == 3) {
    printf("%s is in contig 0\n", seq_name);
    return;
  }

  if(input_ctg != -1 && input_ctg != cur_contig) {
    printf("Please select ctg%d.bss\n", cur_contig);
    return;
  }

  s = (SeqCtg **)malloc(max_seqctg*sizeof(SeqCtg *));
  if(s == NULL) 
    messcrash("ERROR---out of memory\n");

  for(i = 0; i < max_seqctg; i++) 
    s[i] = seq_ctgs+i;

  hit_clones = (GArray **)malloc(sizeof(GArray *)*max_seqctg);
  if(hit_clones == NULL) 
    messcrash("ERROR---out of memory\n");

  bes = g_array_new(FALSE, FALSE, sizeof(BESInfo));

  for(i = 0; i < max_seqctg; i++) { 
    seq_ctg = s[i]; 
    hit_list = hits_seqctg_ctg(&bsi, cur_contig,seq_ctg->seqctg);
    hit_clones[i] = g_array_new(FALSE, FALSE, sizeof(HitClone));
    seq_ctg->hits = hit_clones[i];
    if(hit_list != NULL) {
      get_hits(hit_list, seq_ctg, cur_contig, bes,&bsi);
      g_array_free(hit_list, TRUE);
      hit_list = NULL;
    }
  }

  /* Find and set the contained clones that have both ends hit */
  find_contained(s);

  /*Check if the order of SeqCtg is reversed, it is 
   *1, 2, 3, ... or ..., 3, 2, 1*/
  reversed = is_reversed(s);

  /*Print the orientation of SeqCtgs */
  count = 0;
  for(i = 0; i < max_seqctg; i++) { 
    seq_ctg = s[i]; 
    if(seq_ctg->oret == RCY) {
      if(count == 0) 
        printf("\nSeqCtg%d", seq_ctg->seqctg);
      else 
        printf(", SeqCtg%d", seq_ctg->seqctg);
      count++;
    }
  }
  if(count != 0) printf(" are reversed\n");
      
  /*Print out left MTP clone list */ 
  printf("Query clone %s\n", seq_name);
  printf("Ctg%d", cur_contig);
  printf(" %d %d\n", query_x, query_y);
  print_clonelist(s, LEFT, reversed);
  
  /*Print out right MTP clone list */ 
  printf("Query lone %s\n", seq_name);
  printf("Ctg%d", cur_contig);
  printf(" %d %d\n", query_x, query_y); 
  print_clonelist(s, RIGHT, reversed);

  /*Free allocated memories */
  for(i = 0; i < bes->len; i++) {
    b = &g_array_index(bes, BESInfo, i);
    g_ptr_array_free(b->seqctg_list, TRUE);
  } 
  g_array_free(bes, TRUE);

  for(i = 0; i < max_seqctg; i++) 
    g_array_free(hit_clones[i], TRUE);
  free(hit_clones);
  hit_clones = 0; 

  free(s);
  s = 0;
  free(seq_ctgs);  
  seq_ctgs = 0;


}

/*                        DEF : is_reversed  
 *Check if the order of SeqCtgs is reversed.      
 *             
 *If the original order is 1, 2, 3, ..., the order is right. 
 *If the original order is ..., 3, 2, 1, the order is reversed.   
 *Check left MTP clones in the first SeqCtg and the last SeqCtg, 
 *If the middle coordinate of the clone in first SeqCtg is less 
 *than that of the clone in last SeqCtg, the order is right.
 *Othewise, the order is reversed. 
 *If there is no left MTP clones, check the rigth MTP clones   
 *                                                            */ 
static BOOL is_reversed(SeqCtg **s) {
  int i, j, count;
  SeqCtg *seq_ctg;
  HitClone *h;
  BSS_HIT *t;
  int mid1, mid2;
  CLONE *clp;
 
  sort_seqctg_asce(s, max_seqctg);
  
  count = 0;
  for(i = 0; i < max_seqctg; i++) {
    seq_ctg = s[i]; 
    for(j = 0; j < seq_ctg->hits->len; j++) {
      h = &g_array_index(seq_ctg->hits, HitClone, j);
      clp = h->clp;
      t = h->hit;
      /*Left MTP clones */
      if((seq_ctg->oret == RCN && bss_is_rc(t) ) || (seq_ctg->oret == RCY && bss_is_rc(t))) {
        if(count==0) mid1 = (clp->x+clp->y)/2;
        else if(count != 0) mid2 = (clp->x + clp->y)/2;
        count++;
        break;
      }
    }
  }

  if(count < 2) {
    count = 0;
    for(i = max_seqctg-1; i >= 0; i--) {
      seq_ctg = s[i]; 
      for(j = 0; j < seq_ctg->hits->len; j++) {
        h = &g_array_index(seq_ctg->hits, HitClone, j);
        t = h->hit;
        clp = h->clp;
        /*Right MTP clones */
        if((seq_ctg->oret == RCN && bss_is_rc(t)) || (seq_ctg->oret == RCY && bss_is_rc(t))) {
          if(count==0) mid2 = (clp->x+clp->y)/2;
          else if(count != 0) mid1 = (clp->x + clp->y)/2;
          count++;
          break;
        }
      }
    }
  } 
  if(count >= 2) {
    if(mid1 < mid2) return FALSE;
    else return TRUE;
  }
  return FALSE;
}

/*                        DEF : get_curctg
 *Get the current contig.
 * 
 *If the sequence is in one clone or multiple clones of a contig, 
 *get the contig as the current contig;  
 *If the multiple clones are in different contigs, return error  */ 
int get_curctg(BSS_INST* pbsi) {
  int i, j, index=-1;
  struct list *q;
  CLONE *clp;
  char tmp_str[50]; 
  //struct contig *cptr;

  num_sclone = 0;
  cur_contig = 0;
  query_x = INT_MAX;
  query_y = INT_MIN;
  strcpy(tmp_str, seq_name);
  lowerstring(tmp_str);


  if(!fppFind(acedata,seq_name,&index, cloneOrder)){
    /*If no exact name match  */
    gen_list();
    q = clone_list;

    while(q!=NULL){
      clp = arrp(acedata,q->index,CLONE);
      if(strcasestr_priv(clp->clone,tmp_str))
        seq_clone[num_sclone++] = clp;
      q=q->next;
    }
    free_list();

    for(i = 0; i < num_sclone; i++) {
      if(seq_clone[i]->ctg != 0) {
        cur_contig = seq_clone[i]->ctg;
        break;
      }
    }
    if(cur_contig == 0) return 3;

    for(j = i+1; j < num_sclone; j++) {
      if(seq_clone[j]->ctg != cur_contig) 
        return 2;
    }
  } else {  
    clp = arrp(acedata, index, CLONE);
    cur_contig = clp->ctg;
    seq_clone[0] = clp;
    num_sclone = 1;
  }

  find_Clone(cur_contig);
  orderclones(cur_contig);
  g_assert(root != NULL);


  if(num_sclone == 0) 
    return 1;
  
  for(i = 0; i < num_sclone; i++) {
    if(seq_clone[i]->x < query_x) 
      query_x = seq_clone[i]->x;
    if(seq_clone[i]->y > query_y) 
      query_y = seq_clone[i]->y;
  }
  return 0;
}

/*                        DEF : gen_list
 *Generate a list of all clones             */
static void gen_list() {
  struct list *p=NULL;
  int i, j, max;
  BOOL found = FALSE;
 
  max = arrayMax(acedata);
  found = FALSE;
  j=0;
  while(!found){
    if(arrp(acedata,j,CLONE)->clone[0] != '!'){
	  clone_list  = (struct list *)messalloc((sizeof(struct list)));	
	  clone_list->index = j;
	  clone_list->next = NULL;
	  p = clone_list;
	  found = TRUE;
	}
	j++;
  }
  for(i=j;i<max;i++){
     if(p!=NULL){
	p->next  = (struct list *)messalloc((sizeof(struct list)));	
	p=p->next;
	p->index= i;
     }
  }
  if(p!=NULL) p->next = NULL;
}

/*                        DEF : free_list
 *Free the memory of all clone list         */
static void free_list() {
  struct list *p, *temp;

  p= clone_list;
  while(p!=NULL){
    temp=p;
    p=p->next;
    if(!messfree(temp))
      printf("error freeing list memory\n");
  }
}

/*                        DEF : read_seqctgs
 *Read the SeqCtg info from the bss file     */
int read_seqctgs(SeqCtg *seq_ctgs) {
  int count = 0;
  FILE *bss_fp;
  char str_buff[BUFFER_SIZE] = "";
  int seqctg_tmp, size;

  bss_fp = fopen(bssFileLoad, "r");
  if(bss_fp == NULL) {
    printf("Open BSS file %s failed\n", bssFileLoad);
    return 0;
  }

  while(!feof(bss_fp)) {
    fgets(str_buff, BUFFER_SIZE-1, bss_fp);
    if(strstr(str_buff, "SeqCtg")) {
      while(strlen(str_buff) != 0) {
        sscanf(str_buff, "%u %u", &seqctg_tmp, &size);
        seq_ctgs[count].seqctg = seqctg_tmp;
        seq_ctgs[count].size = size;
        seq_ctgs[count].new_seqctg = MAX_SEQCTG;
        seq_ctgs[count].median = -1;
        seq_ctgs[count].hits = NULL;
        seq_ctgs[count].neigh_count = 0;
        seq_ctgs[count].dist_count = 0;
        seq_ctgs[count].parent_score = 0;
        seq_ctgs[count].oret = UNKNOWN;
        seq_ctgs[count].oret_score = 0;
        seq_ctgs[count].included = 0;
        count++;
        fgets(str_buff, BUFFER_SIZE-1, bss_fp);
        str_buff[strlen(str_buff)-1]='\0';
      }
      break;
    }
  }

  if(bss_fp != NULL)
   fclose(bss_fp);
  return count;
}

HitList hits_bes(BSS_INST* pbsi, gchar* bes_name)
{
    assert("implement this!" && 0);
}

/*                        DEF : dp_alignment
 *Show the alignment of the sequenced clone and a givne BES using
 *dynamic programming                                            */
static void dp_alignment() {
  HitList hit_list = NULL;
  BSS_HIT *hit;
  int i;
  BSS_INST bsi;

  hit_CR();

  if(strcmp(bssFileLoad, "") == 0) {
    printf("Please load BSS file first\n");
    return;
  }

  if (!read_bss_file(bssFileLoad,&bsi))
  {
     g_warning("Failed to  open %s", bssFileLoad);
     return;
  }


  hit_list = hits_bes(&bsi, bes_name);
  if(hit_list == NULL) {
    printf("There is no hit by BES %s\n", bes_name);
    return;
  }

  for(i = 0; i < hit_list->len; i++) {
    hit = arrp(bsi.bss_hits,g_array_index(hit_list, int,i),BSS_HIT);
    display_alignment(bsi.query_file, bsi.dbs, hit);
  }
  
  g_array_free(hit_list,TRUE);

}

/*                          DEF: display_alignment
 *Display the alignment of the sequenced clone and an input BES  */
static void display_alignment(const char *query_file, const GArray *dbs, const BSS_HIT* hit) {
  int i, j, start_tmp;
  int qstart, qend, qlen, slen;
  int score_querylen, score_subjectlen, max_score, align_qstart, align_len, qalign_len;
  char *query_seq, *subject_seq, *query_align, *subject_align;
  Scores **scores;

  /*Find the start and end point of the sequence */
  start_tmp = hit->data[BSS_QEND].i - hit->data[BSS_TLEN].i;
  qstart = start_tmp >= 0 ? start_tmp : 0;
  qend = hit->data[BSS_QSTART].i + hit->data[BSS_TEND].i;
  qlen = qend - qstart + 1;
  
  printf("\nQuery clone: %s\n", seq_name); 
  printf("BES: %s, SeqCtg: %d\n", hit->data[BSS_BES].str, hit->data[BSS_SEQCTG].i);  

  query_seq = (char *)calloc((qlen+1), sizeof(char));
  if(query_seq == NULL) 
    messcrash("ERROR---out of memory\n");
    
  subject_seq = (char *)calloc((hit->data[BSS_TLEN].i+1), sizeof(char));
  if(subject_seq == NULL) 
    messcrash("ERROR---out of memory\n");

  /*Read the given sequence range from the sequence file */ 
  get_queryseq(query_file, hit, qstart, qend, query_seq);
  qlen = strlen(query_seq); 

  /*Read the input BES from the BES file */ 
  get_subjectseq(dbs, subject_seq, hit); 
  slen = strlen(subject_seq); 
  g_assert(slen == hit->data[BSS_TLEN].i); 

  if(bss_is_rc(hit)) { 
    reverse_seq(subject_seq, slen);
    complement_seq(subject_seq, slen);
  }
   
  score_querylen = qlen + 1; 
  score_subjectlen = slen + 1;
  
  scores = (Scores **)malloc(score_querylen*sizeof(Scores *));
  if(scores == NULL) 
    messcrash("ERROR---out of memory\n");

  for(i = 0; i < score_querylen; i++) { 
    scores[i] = (Scores *)calloc(score_subjectlen,sizeof(Scores));
    if(scores[i] == NULL) 
      messcrash("ERROR---out of memory\n");
  }

  get_scores(query_seq, subject_seq, score_querylen, score_subjectlen, scores);
  
  query_align = (char *)calloc((score_querylen+score_subjectlen+1), sizeof(char));
  if(query_align == NULL) 
    messcrash("ERROR---out of memory\n");
  
  subject_align = (char *)calloc((score_querylen+score_subjectlen+1), sizeof(char));
  if(subject_align == NULL) 
    messcrash("ERROR---out of memory\n");

  max_score = INT_MIN;
  j = score_subjectlen - 1;
  for(i = 0; i < score_querylen; i++) { 
    if((scores[i]+j)->score > max_score) { 
      max_score = (scores[i]+j)->score;
      align_qstart = i;
    }
  }
  align_len = 0;
  i = align_qstart;
  
  while(i != 0 && j != 0) {
    if((scores[i]+j)->direction == 'U') {
      query_align[align_len] = query_seq[i-1];
      subject_align[align_len] = ' '; 
      i--; 
    } else if((scores[i]+j)->direction == 'L') {
      query_align[align_len] = ' ';
      subject_align[align_len] = subject_seq[j-1]; 
      j--; 
    } else {
      query_align[align_len] = query_seq[i-1];
      subject_align[align_len] = subject_seq[j-1];
      i--;
      j--; 
    }
    align_len++;
  }

  qalign_len = strlen(query_align);
  reverse_seq(query_align, qalign_len);
  reverse_seq(subject_align, qalign_len);

  print_alignment(hit, qalign_len, query_align, subject_align, qstart, max_score);

  /*Free memories allocated  */
  free(query_align); 
  query_align = 0;
  free(subject_align);
  subject_align = 0;
  for(i = 0; i < score_querylen; i++) {
    free(scores[i]); 
    scores[i]=0;
  }
  free(scores);
  scores = 0;
  free(query_seq);
  query_seq = 0;
  free(subject_seq);
  subject_seq = 0;    
}

/*                                 DEF: get_queryseq
 *Get a certain portion of the query sequence(from qstart to qend) into string array query_seq */
static void get_queryseq(const char *query_file, const BSS_HIT *hit, int qstart, int qend, char *query_seq) {
  FILE *fp_query;
  char str_buff[BUFFER_SIZE]="", *str_tmp;
  int seq_ctg;

  fp_query = fopen(query_file, "r");
  g_assert(fp_query != NULL);

  fgets(str_buff, BUFFER_SIZE-1, fp_query);
  str_buff[strlen(str_buff)-1]='\0';

  str_tmp = strstr(str_buff, "Contig");
  if(str_tmp == NULL)  
    read_queryseq(fp_query, qstart, qend, query_seq);
  else {
    while(!feof(fp_query)) {
      if(str_tmp != NULL) {
        sscanf(str_tmp, "Contig%d", &seq_ctg);
        if(seq_ctg == hit->data[BSS_SEQCTG].i) {
          read_queryseq(fp_query, qstart, qend, query_seq); 
          break;
        }
      } 
      fgets(str_buff, BUFFER_SIZE, fp_query);
      str_buff[strlen(str_buff)-1]='\0';
      str_tmp = strstr(str_buff, "Contig");
    } 
  }
  fclose(fp_query);
}
 
/*                        DEF: read_query_seq
 *Read the query sequence from .seq file     */
static void read_queryseq(FILE *fp_query, const int start, const int end, char *query_seq) {
  char str_buff[BUFFER_SIZE] = "";
  int i, read_num = 0, line_num;
  gboolean seq_begin=FALSE;
   
  while(!feof(fp_query)) {   
    fgets(str_buff, BUFFER_SIZE, fp_query);
    str_buff[strlen(str_buff)-1]='\0';
    if(strstr(str_buff, "Contig") != NULL) 
      break;
    line_num = strlen(str_buff);
    for(i = 0; i < line_num; i++)
      str_buff[i] = toupper(str_buff[i]);
    read_num += line_num;
    if(!seq_begin) {
      if(start < read_num) {
        if(end < read_num) {
          strncat(query_seq, str_buff + start - (read_num - line_num) , end - start + 1);
          break;
        } else { 
          strncat(query_seq, str_buff + start - (read_num - line_num), read_num - start);
          seq_begin = TRUE;
        } 
      }
    } else {
      if(end < read_num) {
        strncat(query_seq, str_buff, end - (read_num - line_num) + 1);
        seq_begin = FALSE;
        break;
      } else {
        strncat(query_seq, str_buff, line_num);
      } 
    }
  } 
}

/*                                 DEF: get_subjectseq
 *Read the subject sequence into string array subject_seq from .bes file*/
static void get_subjectseq(const GArray *dbs, char *subject_seq, const BSS_HIT *hit) {
  FILE *fp_subject;
  gchar *subject_name;
  gboolean find_subject = FALSE;
  int i, j;
  char str_buff[BUFFER_SIZE]= "";
  char str_tmp[50]="", *pos;  
 
  pos = strchr(hit->data[BSS_BES].str, '.');
  if(pos != NULL) {
    strncpy(str_tmp, hit->data[BSS_BES].str, pos - hit->data[BSS_BES].str);
    strcat(str_tmp, pos+1);
  }

  find_subject = FALSE;
  for(j = 0; j < dbs->len; j++) {
    subject_name = g_array_index(dbs, gchar*, j);
    if(subject_name[strlen(subject_name)-1] == ')') 
      subject_name[strlen(subject_name)-1] = '\0';
    
    fp_subject = fopen(subject_name, "r");
    g_assert(fp_subject != NULL);
   
    while(!feof(fp_subject)) {
      fgets(str_buff, BUFFER_SIZE-1, fp_subject);
      str_buff[strlen(str_buff)-1] = '\0';
      if(strstr(str_buff, hit->data[BSS_BES].str) || (pos != NULL && strstr(str_buff, str_tmp))) {
        find_subject = TRUE;
        while(strlen(subject_seq) < hit->data[BSS_TLEN].i) {
          fgets(str_buff, BUFFER_SIZE, fp_subject);
          str_buff[strlen(str_buff)-1] = '\0';
          for(i = 0; str_buff[i]!='\0'; i++) 
            str_buff[i] = toupper(str_buff[i]);
          strcat(subject_seq, str_buff);
        }
        break;
      }  
    } 
    fclose(fp_subject);
    if(find_subject) 
      break;
  }
}

/*                                 DEF: print_alignment 
 *Print out the aligment between the sequenced clone and the input BES */
static void 
print_alignment(const BSS_HIT *hit, int qalign_len, char *query_align, char *subject_align, int qstart, int max_score) {
  char *line_align, str_buff[BUFFER_SIZE]= "";
  int i, line_num;
   
  line_align = (char *)calloc(qalign_len, sizeof(char));
  if(line_align == NULL) 
    messcrash("ERROR---out of memory\n");

  for(i = 0; i < qalign_len; i++)  
    line_align[i] = (query_align[i] == subject_align[i]) ? '|' : ' ';
 
  printf("Score: %d\n", max_score);
  printf("Alignment:\n");
  memset(str_buff, 0, BUFFER_SIZE); 
  line_num = qalign_len/LINE_SIZE;
  
  for(i = 0; i < (line_num + 1); i++) {
    strncpy(str_buff, query_align + LINE_SIZE*i, LINE_SIZE);
    if(i != line_num) 
      printf("Query : %-8d %s  %d\n", qstart+i*LINE_SIZE, str_buff, qstart+(i+1)*LINE_SIZE-1);
    else
      printf("Query : %-8d %s  %d\n", qstart+i*LINE_SIZE, str_buff, qstart+qalign_len-1);

    strncpy(str_buff, line_align + LINE_SIZE*i, LINE_SIZE);
    printf("                 %s\n", str_buff);
    strncpy(str_buff, subject_align + LINE_SIZE*i, LINE_SIZE);
    if(i != line_num) {
      if(bss_is_rc(hit))
        printf("Sbjct : %-8d %s  %d\n", qalign_len-i*LINE_SIZE, str_buff, qalign_len-(i+1)*LINE_SIZE);
      else
        printf("Sbjct : %-8d %s  %d\n", i*LINE_SIZE+1, str_buff, (i+1)*LINE_SIZE);
    }
    else {
      if(bss_is_rc(hit)) 
        printf("Sbjct : %-8d %s  %d\n", qalign_len - i*LINE_SIZE, str_buff, 1);
      else 
        printf("Sbjct : %-8d %s  %d\n", i*LINE_SIZE+1, str_buff, strlen(query_align));
    }
    printf("\n\n");
  }

  free(line_align); 
  line_align = 0;
}

/*                          DEF: print_clonelist
 *Display the left or right MTP clone list */
static void print_clonelist(SeqCtg **s, direction mtp_dir, BOOL reversed) {
  int i, j, count = 0, hit_count;
  BSS_HIT* t;
  HitClone *h;
  char tmp_rc, end;
  SeqCtg *seq_ctg;
  HitClone **hits;

  if(((mtp_dir == LEFT) && reversed) || 
     ((mtp_dir == RIGHT) && !reversed)) 
    i = max_seqctg-1;
  else 
    i = 0;
 
  while(1) { 
    seq_ctg = s[i]; 
    hits = (HitClone **)malloc(seq_ctg->hits->len*sizeof(HitClone *));
    if(hits == NULL) 
      messcrash("ERROR---out of memory\n");

    hit_count = 0;
    for(j = 0; j < seq_ctg->hits->len; j++) {
      h = &g_array_index(seq_ctg->hits, HitClone, j);
      t = h->hit;
      if((mtp_dir == LEFT && seq_ctg->oret == RCN) || (mtp_dir == RIGHT && seq_ctg->oret == RCY))  {
        if(mtp_dir == LEFT) { 
          if(bss_is_rc(t) && (h->end_in == YIN || h->end_in == YUNDET)) 
            hits[hit_count++] = h;
        } else if(mtp_dir == RIGHT) {
          if(bss_is_rc(t) && (h->end_in == XIN || h->end_in == XUNDET))
            hits[hit_count++] = h;
        }
      } else {
        if(mtp_dir == LEFT) { 
          if(!bss_is_rc(t) && (h->end_in == YIN || h->end_in == YUNDET))
            hits[hit_count++] = h;
        } else if(mtp_dir == RIGHT) {
          if(!bss_is_rc(t) && (h->end_in == XIN || h->end_in == XUNDET))
            hits[hit_count++] = h;
        }
      }
    }

    if(hit_count > 0) {
      if((mtp_dir == LEFT && seq_ctg->oret == RCN) || (mtp_dir == RIGHT && seq_ctg->oret == RCY))   
        sort_hitclone_start(hits, hit_count);
      else 
        sort_hitclone_end(hits, hit_count);

      for(j = 0; j < hit_count; j++) {
        h = hits[j]; 
        t = h->hit;
        if(count == 0) {
          if(mtp_dir == LEFT) {
            printf("The leftmost clone is %s\n", t->data[BSS_CLONE].str);
            printf("The left clone list is:\n");
          } else {
            printf("The rightmost clone is %s\n", t->data[BSS_CLONE].str);
            printf("The right clone list is:\n");
          }
          printf("BES               RC SeqCtg Start     End       x       y"
                 "    CB_Olap   HitLen%%  Identity\n");
        }
    
        tmp_rc = bss_is_rc(t) ? 'y' : 'n';
        if(h->end) 
          end = '*';
        else 
          end = ' ';
        printf("%-18s%-c  %-7u%-10u%-10u%-8u%-8d%-8d%3d%%%c   %5u%%\n", 
              t->data[BSS_BES].str,tmp_rc, t->data[BSS_SEQCTG].i, 
                t->data[BSS_QSTART].i, t->data[BSS_QEND].i,  
              h->clp->x, h->clp->y, h->cb_olap, (int)h->pct_hitlen, end, t->data[BSS_IDENTITY].i);
        count++;
        if(count == top_num) 
          break;
      } 
    }
    free(hits);
    hits = 0;
    if(count == top_num) break;

    if(((mtp_dir == LEFT) && reversed) || 
     ((mtp_dir == RIGHT) && !reversed)) {
      i--;
      if(i < 0) break;
    } else {
      i++;
      if(i == max_seqctg) break;
    }     
  } 

  if(count == 0) {
    if(mtp_dir == LEFT) 
      printf("There is no left MTP clones!\n");
    else
      printf("There is no right MTP clones!\n");
  }
  printf("\n");
}

/*                     DEF: reverse_seq
 *Reverse the sequence                 */    
static void reverse_seq(char *str, int size) {
  char c;
  char *head, *tail;
  head = str;
  tail = str + size - 1;
  while(head < tail) {
    c = *head;
    *(head++) = *tail;
    *(tail--) = c;
  }
}

/*                     DEF: complement_seq
 *Complement the sequence                 */
static void complement_seq(char *seq, int size) {
  int i;
  
  for(i = 0; i < size; i++) {
    if(seq[i] == 'C') 
      seq[i] = 'G';  
    else if(seq[i] == 'G') 
      seq[i] = 'C';  
    else if(seq[i] == 'A') 
      seq[i] = 'T';  
    else if(seq[i] == 'T') 
      seq[i] = 'A';  
    else if(seq[i] == 'c') 
      seq[i] = 'g';  
    else if(seq[i] == 'g') 
      seq[i] = 'c';  
    else if(seq[i] == 'a') 
      seq[i] = 't';  
    else if(seq[i] == 't') 
      seq[i] = 'a';  
  }
}

/*                      DEF: get_scores
 *Compute the scores of query sequence and subject sequence array for dyanmic programming alignment*/
static void get_scores(const char *query_seq, const char *subject_seq, int query_len, int subject_len, Scores **scores) {
  int col_prev, row_prev, rowcol_prev, max;
  int i, j, p;
 
  for(i = 0; i < subject_len; i++) { 
    (scores[0]+i)->score = -2*i;
    (scores[0]+i)->direction = 'L';
  }

  for(i = 1; i < query_len ; i++) {
    (scores[i])->score = 0;
    (scores[i])->direction = 'U';
  }

  for(i = 1; i < query_len; i++) {
    for(j = 1; j < subject_len; j++) {
      col_prev = (scores[i]+j-1)->score-2;
      row_prev = (scores[i-1]+j)->score-2;
      p = (query_seq[i-1] == subject_seq[j-1]) ? 1 : -1; 
      rowcol_prev = (scores[i-1]+j-1)->score + p;

      if(row_prev >= rowcol_prev) {
        max = row_prev;
        (scores[i]+j)->score = max;
        (scores[i]+j)->direction = 'U';
      } else {
        max = rowcol_prev;
        (scores[i]+j)->score = max;
        (scores[i]+j)->direction = 'D';
      }
      if(col_prev > max) {
        max = col_prev;
        (scores[i]+j)->score = max;
        (scores[i]+j)->direction = 'L';
      }
    }
  }    
}

/*                         DEF: get_hits 
 *Get hit list satisfying the given paramters. */
void get_hits(HitList hit_list, SeqCtg *seq_ctg, int ctg, GArray *bes, BSS_INST* pbsi) {
  int i, j, start, end, index;
  float hit_len;
  HitClone h;
  char tmp_rc;
  BSS_HIT *t;
  CLONE *clp;
  struct contig *cptr;
  GArray *hit_clones;
  BOOL is_query = FALSE;
  BESInfo *b;
  int xend_rc_y, xend_rc_n, yend_rc_y, yend_rc_n;

  hit_clones = seq_ctg->hits;
  start = query_x-minolap;
  end = query_y+minolap;

  xend_rc_y = xend_rc_n = yend_rc_y = yend_rc_n = 0;

  for(i = 0; i < hit_list->len; i++) { 
    t = arrp(pbsi->bss_hits,g_array_index(hit_list,int, i),BSS_HIT);
    is_query = FALSE;
    
    tmp_rc = t->data[BSS_RC].str[0];
    for(cptr = root; cptr != NULL; cptr = cptr->new) {
      clp = arrp(acedata, cptr->next, CLONE);
    if(strcmp(clp->clone, t->data[BSS_CLONE].str) == 0)  
      break;
    } 
    hit_len = (float)(t->data[BSS_QEND].i - t->data[BSS_QSTART].i + 1);
    hit_len = hit_len*100/t->data[BSS_TLEN].i; 

    h.hit = t;
    h.clp = clp;
    h.pct_hitlen = hit_len;
    h.end = 0;
    h.parent_hit = 0;
    h.checked = 0;
    h.count = 1;

    /* Check if it is a parent BES hit */
    for(j = 0; j < num_sclone; j++) {
      if(!strcmp(t->data[BSS_CLONE].str, seq_clone[j]->clone)) {  
        is_query = TRUE;
        break;
      }
    }
 
    if(!is_query) { /*Not a parent BES hit */
      /* Check if satisfying the parameters */
      if((clp->x >= start && clp->x <=end) || (clp->y >=start && clp->y<=end)) {
        if(t->data[BSS_IDENTITY].i >= minId) {
          if(t->data[BSS_QSTART].i <= seq_error || t->data[BSS_QEND].i >= (seq_ctg->size-seq_error) || hit_len >= minHitLenPer) {
            if(t->data[BSS_QSTART].i <= seq_error || t->data[BSS_QEND].i >= (seq_ctg->size-seq_error)) 
              h.end = 1; 

            h.cb_olap = min(query_y, clp->y) - max(query_x, clp->x) + 1;
             
            if((clp->x >= start && clp->x <= end) && (clp->y >= start && clp->y <= end)) { 
              if(clp->x < (query_x-FPC_error) && clp->y < query_y) 
                h.end_in = YIN;
              else if(clp->x >= (query_x-FPC_error) && clp->x < query_x && clp->y < query_y) 
                h.end_in = YUNDET;
              else if(clp->y > (query_y+FPC_error) && clp->x > query_x)
                h.end_in = XIN;
              else if(clp->y <= (query_y+FPC_error) && clp->y > query_y && clp->x > query_x)
                h.end_in = XUNDET;
              else  
                h.end_in = XYIN;
            }
            else if(clp->x >= start && clp->x <= end && clp->y > end) 
              h.end_in = XIN;
            else 
              h.end_in = YIN;

            index = in_bes(bes, t);
            if(index != -1) {
              b = &g_array_index(bes, BESInfo, index);
              g_ptr_array_add(b->seqctg_list, seq_ctg);
              h.bes_index = index;
            } else { 
              add_to_bes(bes, seq_ctg, t);
              h.bes_index = bes->len-1;
            }

            if(next_mtp) { /*Find next MTP clones */ 
              /*If the clone is embeded, the hit will not be added into the list */
              if(h.end_in != XYIN)  
                g_array_append_val(hit_clones, h);
            } else {
              h.neigh_hits = g_ptr_array_new();
              h.dist_hits = g_ptr_array_new();
              g_array_append_val(hit_clones, h);
            }
            if(h.end_in == XIN) {
              if(tmp_rc == 'y') xend_rc_y++;
              else xend_rc_n++;
            } else if(h.end_in == YIN) {
              if(tmp_rc == 'y') yend_rc_y++;
              else yend_rc_n++;
            }
          } 
        }   
      }     
    } 
    else {/*If it is a parent BES hit */ 
      h.end_in = XYIN;
      if(t->data[BSS_QSTART].i <= seq_error || t->data[BSS_QEND].i >= (seq_ctg->size-seq_error))  
        h.end = 1;
      
      h.parent_hit = 1;

      if(!next_mtp) {
        index = in_bes(bes, t);
        if(index != -1)  {
          b = &g_array_index(bes, BESInfo, index);
          g_ptr_array_add(b->seqctg_list, seq_ctg);
          h.bes_index = index;
        } else { 
          add_to_bes(bes, seq_ctg, t);
          h.bes_index = bes->len-1;
        } 
        h.neigh_hits = g_ptr_array_new();
        h.dist_hits = g_ptr_array_new();
        g_array_append_val(hit_clones, h);
      }
    }
  }

  if(next_mtp) {
    seq_ctg->rc_n = yend_rc_y + xend_rc_n;
    seq_ctg->rc_y = xend_rc_y + yend_rc_n;
    if(seq_ctg->rc_y > seq_ctg->rc_n) 
      seq_ctg->oret = RCY; 
    else if(seq_ctg->rc_y < seq_ctg->rc_n)  
      seq_ctg->oret = RCN; 
  } 
}

/*                         DEF: add_to_bes 
 *Add the BES info to bes list  */
static void add_to_bes(GArray *bes, SeqCtg *seq_ctg, BSS_HIT *t) {
   BESInfo b;

   b.seqctg_list = g_ptr_array_new(); 
   b.in_neigh = 0;
   strcpy(b.name, t->data[BSS_BES].str);
   g_ptr_array_add(b.seqctg_list, seq_ctg);
   g_array_append_val(bes, b);
}

/*                         DEF: in_bes 
 *Check if the bes hit is already in bes list */
int in_bes(GArray *bes, BSS_HIT *t) {
   int j;
   BESInfo *b;

   for(j = 0; j < bes->len; j++) {
     b = &g_array_index(bes, BESInfo, j);
    /*If the same BES hit multiple seq ctgs */
     if(!strcmp(b->name, t->data[BSS_BES].str))  
       return j; 
   }
   return -1;
}


static int min(int a, int b) {
   return a <= b ? a : b;
}

/*                         DEF: max 
 *Get the smaller one from two integers */
static int max(int a, int b) {
   return a >= b ? a : b;
}

/*                         DEF: find_contained  
 * Find the contained clones which has both ends hit*/
static void find_contained(SeqCtg **s) {
  SeqCtg *s1, *s2;
  GArray *hits1, *hits2;
  HitClone *h1, *h2;
  int i, j, k, l;  
  CLONE *clp1, *clp2;
  BSS_HIT *t1, *t2;
 
  for(i = 0; i < max_seqctg; i++) {
    s1 = s[i];
    hits1 = s1->hits;

    /*Traverse all hits in SeqCtg1 */
    for(j = 0; j < hits1->len; j++) {
      h1 = &g_array_index(hits1, HitClone, j);
      t1 = h1->hit;
      clp1 = h1->clp;      

      /*check all hits in other seqctgs or itself*/ 
      for(k = 0; k < max_seqctg; k++) {
        s2 = s[k]; 
        hits2 = s2->hits;

        /*Traverse all hits in SeqCtg2 */
        for(l = 0; l < hits2->len; l++) {
          h2 = &g_array_index(hits2, HitClone, l);
          t2 = h2->hit;
          clp2 = h2->clp;

          /*if bes1 and bes2 hit the same clone */
          if(!strcmp(clp1->clone, clp2->clone)) { 
            if(strcmp(t1->data[BSS_BES].str, t2->data[BSS_BES].str)) {
              h1->end_in = XYIN;
              h2->end_in = XYIN;
            } 
          } 
        }  /*end of for of l */
      } /*end of for of k */   
    } /*end of for of j */
  } /* end of for of i */
}

/*                   DEF: sort_seqctg_asce 
 *Sort sequence contigs in ascending order of original SeqCtg number*/
void sort_seqctg_asce(SeqCtg** seq_ctgs, int size)
{
  qsort(seq_ctgs, size, sizeof(SeqCtg *),
    (int (*)(const void*, const void*))seqctg_compare_asce);
}


static gint
seqctg_compare_asce(const SeqCtg** c1, const SeqCtg** c2) 
{

  if((*c1)->seqctg < (*c2)->seqctg) 
    return -1;
  else if((*c1)->seqctg > (*c2)->seqctg) 
    return 1;
  else
    return 0;
}

/*                   DEF: sort_hitclone_start 
 *Sort hits in ascending order of the start points*/
void sort_hitclone_start(HitClone **hit_clones, int size) {
  qsort(hit_clones, size, sizeof(HitClone*),
    (int (*)(const void*, const void*))hitclone_compare_start);
}

static int hitclone_compare_start(const HitClone **h1, const HitClone **h2) {
  if((*h1)->hit->data[BSS_QSTART].i < (*h2)->hit->data[BSS_QSTART].i)
    return -1;
  else if((*h1)->hit->data[BSS_QSTART].i > (*h2)->hit->data[BSS_QSTART].i)
    return 1;
  else
    return 0;
}

/*                   DEF: sort_hitclone_end 
 *Sort hits in descending order of the end points*/
static void sort_hitclone_end(HitClone **hit_clones, int size) {
  qsort(hit_clones, size, sizeof(HitClone*),
    (int (*)(const void*, const void*))hitclone_compare_end);
}

static int hitclone_compare_end(const HitClone **h1, const HitClone **h2) {
  if((*h1)->hit->data[BSS_QEND].i > (*h2)->hit->data[BSS_QEND].i)
    return -1;
  else if((*h1)->hit->data[BSS_QEND].i < (*h2)->hit->data[BSS_QEND].i)
    return 1;
  else
    return 0;
}

